import pandas as pd
import math
import numpy as np
# ---------------------------------------
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pylab
plt.rcParams['figure.figsize']=(17,5)
# ---------------------------------------
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import iplot
import warnings
warnings.filterwarnings("ignore")
def line_plot(df, date, I, text, y_tiltle, x_title):
data = go.Scatter(x = df[date],
y = df[I],
mode = 'lines',
name = I)
layout = go.Layout(title={'text': text,
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis = dict(title = x_title),
yaxis = dict(title = y_tiltle),
template = 'plotly_dark')
fig = go.Figure(data = data, layout = layout)
iplot(fig)
# --------------------------------------------------------------------
def line_3plot(df, date, columns, text, y_title, x_title):
data = []
for col in columns:
trace = go.Scatter(
x=df[date],
y=df[col],
mode='lines',
name=col
)
data.append(trace)
layout = go.Layout(
title={
'text': text,
'y': 0.9,
'x': 0.5,
'xanchor': 'center',
'yanchor': 'top'
},
xaxis=dict(title=x_title),
yaxis=dict(title=y_title),
template='plotly_dark'
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)
# ---------------------------------------
def bar_plot(col1, col2, agg_func, title, xtitle, ytitle):
data = go.Bar(x = df.groupby(col1).agg({col2: agg_func}). \
reset_index()[col1],
y = df.groupby(col1).agg({col2: agg_func}). \
reset_index()[col2],
text =round(df.groupby(col1).agg({col2: agg_func}).reset_index()[col2],2),
textposition= 'outside')
layout = go.Layout(title={'text': title,
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis = dict(title = xtitle),
yaxis = dict(title = ytitle),
template = 'plotly_dark')
fig = go.Figure(data = data, layout = layout)
iplot(fig)
# read the data
df = pd.read_csv("data/input_data2019.csv", parse_dates=['Datetime'],index_col='Datetime').sort_values('Datetime')
df.head()
| I1 | I2 | I3 | |
|---|---|---|---|
| Datetime | |||
| 2019-01-01 00:00:00 | 141.224344 | 141.851126 | 144.712441 |
| 2019-01-01 01:00:00 | 124.928353 | 122.933446 | 124.464044 |
| 2019-01-01 02:00:00 | 115.902869 | 114.001158 | 114.375987 |
| 2019-01-01 03:00:00 | 108.653699 | 107.037666 | 107.096276 |
| 2019-01-01 04:00:00 | 103.072493 | 99.546536 | 102.537506 |
# split days weeks months to facilitate EDA plots
df['date'] = df.index
df['hour'] = df['date'].dt.hour
df['day_of_week'] = df['date'].dt.dayofweek
df['weekday_name'] = df['date'].dt.day_name()
df['quarter'] = df['date'].dt.quarter
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year
df['day_of_year'] = df['date'].dt.dayofyear
df['day_of_month'] = df['date'].dt.day
df = df.dropna()
We first look at the data in general, based on hours, days, and then months.
line_3plot(df, 'date', ['I1','I2','I3'], 'Current - Hourly', 'Current', 'Year')
scatter = [go.Scatter(x = df['hour'],
y = df['I1'],
mode ='markers',
marker = dict(color = df['I1'],
showscale = True,
colorscale = 'OrRd',
colorbar = dict(title='I1'),
size = 9,
opacity = 0.55))]
layout = go.Layout(title={'text': "Current consumption by hour of day",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis = dict(title = 'Day'),
yaxis = dict(title = 'Current'),
template = 'plotly_dark')
fig = go.Figure(data = scatter, layout = layout)
iplot(fig)
bar_plot('month', 'I1', 'sum', 'Total current consumption per month',
'Month', 'Current')
df_pivot = df.pivot_table(index=df['hour'], columns='weekday_name', values='I1',
aggfunc='sum')
df_pivot = df_pivot[['Friday', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday',
'Wednesday']].reset_index().rename(columns={'hour':'hour'})
Monday = go.Scatter(x = df_pivot['hour'],
y = df_pivot['Monday'],
mode = 'lines',
name = 'Monday')
Tuesday = go.Scatter(x = df_pivot['hour'],
y = df_pivot['Tuesday'],
mode = 'lines',
name = 'Tuesday')
Wednesday = go.Scatter(x = df_pivot['hour'],
y = df_pivot['Wednesday'],
mode = 'lines',
name = 'Wednesday')
Thursday = go.Scatter(x = df_pivot['hour'],
y = df_pivot['Thursday'],
mode = 'lines',
name = 'Thursday')
Friday = go.Scatter(x = df_pivot['hour'],
y = df_pivot['Friday'],
mode = 'lines',
name = 'Friday')
Saturday = go.Scatter(x = df_pivot['hour'],
y = df_pivot['Saturday'],
mode = 'lines',
name = 'Saturday')
Sunday = go.Scatter(x = df_pivot['hour'],
y = df_pivot['Sunday'],
mode = 'lines',
name = 'Sunday')
layout = go.Layout(title={'text': 'current consumption - hourly trends',
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis = dict(title = 'Hour'),
yaxis = dict(title = 'Current'),
template = 'plotly_dark')
data = [Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday]
fig = go.Figure(data = data, layout = layout)
iplot(fig)